Task 1: Aspatial Operations

Task 1.1

#group by state then summarize costs and drop any  na field values
task1.1 <- bmps %>% group_by(StateAbbreviation) %>% 
  summarize(stateCost = sum(Cost, na.rm = TRUE))

task1.1
## # A tibble: 7 x 2
##   StateAbbreviation  stateCost
##   <chr>                  <dbl>
## 1 DC                 15623736.
## 2 DE                 41329130.
## 3 MD                627164538.
## 4 NY                 15273818.
## 5 PA                562922554.
## 6 VA                723866417.
## 7 WV                 83023958.

Task 1.2

#log transform skewed data
bmps$logCost <- log(bmps$Cost)
bmps$logTotalAmountCredited <- log(bmps$TotalAmountCredited)

#filter dataset to rows where units are acres and plot log transformed data
task1.2 <- bmps %>% filter(., Unit == "Acres") %>% 
  ggplot(., aes(x=logCost, y=logTotalAmountCredited)) +
  geom_point()
  
task1.2
## Warning: Removed 170 rows containing missing values (geom_point).

Task 1.3

#see unique BMPs for cover crops to get idea of spelling
uni.bmps <- c(unique(bmps$BMPShortName))
uni.bmps
##   [1] "wetpondwetland"           "dryponds"                
##   [3] "filter"                   "rr"                      
##   [5] "bioretudab"               "bioswale"                
##   [7] "vegopchannoudab"          "permpavnosvnoudab"       
##   [9] "bioretudcd"               "urbstrmrest"             
##  [11] "infiltration"             "impsurred"               
##  [13] "st"                       "urbantreeplant"          
##  [15] "scp1"                     "scp2"                    
##  [17] "scp3"                     "scp4"                    
##  [19] "scp5"                     "scp6"                    
##  [21] "csoconnect"               "eands1"                  
##  [23] "conplan"                  "forestbuffers"           
##  [25] "treeplant"                "landretireopen"          
##  [27] "grassbuffers"             "oswnofence"              
##  [29] "precrotgrazing"           "watercontstruc"          
##  [31] "extdryponds"              "infiltwithsv"            
##  [33] "forharvestbmp"            "covercroptradwed"        
##  [35] "covercroptradweo"         "barnrunoffcont"          
##  [37] "covercroptradwnd"         "covercroptradwno"        
##  [39] "covercroptradwlo"         "covercroptradbed"        
##  [41] "covercroptradbeo"         "covercroptradbea"        
##  [43] "covercroptradbno"         "covercropcomearly"       
##  [45] "covercropcomnormal"       "covercropcomlate"        
##  [47] "conservetill"             "nmcoren"                 
##  [49] "nmcorep"                  "grassbuffexcl"           
##  [51] "covercroptradtno"         "hrtill"                  
##  [53] "septicdecon"              "septicconnect"           
##  [55] "septicpump"               "wetlandrestorefloodplain"
##  [57] "grassbuffexcleffn"        "grassbufferseffn"        
##  [59] "forestbufferseffn"        "forestbufferseffps"      
##  [61] "grassbufferseffps"        "grassbuffexcleffps"      
##  [63] "wetlandrestoreeff"        "forestbufurban"          
##  [65] "nonurbstrmrest"           "covercroptradreo"        
##  [67] "covercroptradrea"         "covercroptradrnd"        
##  [69] "covercroptradrno"         "covercroptradrld"        
##  [71] "covercroptradrlo"         "covercroptradwea"        
##  [73] "covercroptradwld"         "covercroptradbnd"        
##  [75] "covercroptradfeo"         "covercroptradfea"        
##  [77] "covercroptradleo"         "covercroptradlea"        
##  [79] "covercroptradted"         "covercroptradteo"        
##  [81] "covercroptradtea"         "covercroptradtld"        
##  [83] "covercroptradtlo"         "covercroptradarea"       
##  [85] "scp11"                    "covercroptradokeo"       
##  [87] "covercroptradbreo"        "covercroptradbrea"       
##  [89] "forestbufurbaneff"        "advancedgi"              
##  [91] "grassbuffexclnar"         "landretirepas"           
##  [93] "forestbuffexcl"           "horsepasman"             
##  [95] "imperviousdisconnection"  "covercroptradred"        
##  [97] "urbanforplant"            "loaflot"                 
##  [99] "injection"                "incorplowlate"           
## [101] "urbannmmdca"              "urbannmmddiy"            
## [103] "covercroptradtnd"         "eands2"                  
## [105] "forestbuffexcleffn"       "forestbuffexcleffps"     
## [107] "covercroptradohed"        "covercroptradohea"       
## [109] "covercroptradbred"        "wetlandcreatefloodplain" 
## [111] "wetlandcreateeff"         "forestbuffexclnar"       
## [113] "shoreurb"                 "dirtgraveldsa"           
## [115] "shoreag"                  "covercroptradared"       
## [117] "covercroptradohnd"        "carseqaltcrop"           
## [119] "abanminerec"              "covercroptradnutrnd"     
## [121] "lowrestill"               "nmraten"                 
## [123] "nmratep"                  "nmplacen"                
## [125] "nmplacep"                 "nmtimen"                 
## [127] "nmtimep"                  "forestbuffnarrow"        
## [129] "grassbuffnarrow"          "covercroptradnutrno"     
## [131] "wetlandenhance"           "wetlandenhanceeff"       
## [133] "shoreagnoveg"             "shoreurbnoveg"           
## [135] "covercroptradlnd"         "urbannmplan"             
## [137] "permpavnosvudcd"          "septicsecenhance"        
## [139] "septicseccon"             "vegopchannoudcd"         
## [141] "permpavsvnoudab"          "permpavsvudcd"           
## [143] "covercroptradnutweo"      "septicdeenhance"         
## [145] "bioretnoudab"             "septiceffenhance"        
## [147] "shoreurbveg"              "permpavsvudab"           
## [149] "permpavnosvudab"          "urbfilterrr"             
## [151] "urbfilterst"              "covercroptradnutreo"     
## [153] "covercroptradnutwno"      "covercroptradfed"        
## [155] "covercroptradlgled"       "covercroptradlgleo"      
## [157] "covercroptradled"         "covercroptradfped"       
## [159] "covercroptradfpnd"        "covercroptradlglnd"
#filter BMPShortName to rows with cover in them, checked using uni.bmps
task1.3 <- bmps %>% filter(., stringr::str_detect(BMPShortName, "cover")
                           & TotalAmountCredited > 50 #remove skew from all 0's
                           & TotalAmountCredited < 1000) %>% #makes boxplot more readable
  ggplot(., aes(x=StateAbbreviation, y=TotalAmountCredited)) + #plot
  geom_boxplot()
  
task1.3

Task 1.4

#use filter to remove rows where where year is 0
task1.4 <- dams %>% as_tibble() %>% #convert to tibble for swifter data handling
  filter(., YEAR != 0) %>%  # filter not equal to 0
  ggplot(., aes(x=YEAR, y=STATE)) + #plot
  geom_point(aes(color = OWNER_CODE)) #color by owner code

task1.4

Task 1.5

#sum length of streams per HUC4 code
strm.len <- streams %>% as_tibble() %>% #convert to tibble
  filter(., HUC4 != "0206") %>% #no 0206 dams and stream lenghts are minimal
  group_by(HUC4) %>% #goup by HUC4 code
  summarise(., totLen_KM = sum(LengthKM)) %>%  #sum stream lengths
  ggplot(., aes(x=HUC4, y=totLen_KM)) +
  geom_col(aes(fill = HUC4), show.legend = FALSE) + #don't show legend as legend from dam plot will suffice
  labs(y= "Total Length of Streams (km)",
       caption = "HUC4 code 0206 removed due to lack of dams in basin")

#count of dams per HUC4 code
dam.count <- dams %>% as_tibble() %>% #convert to tibble
  filter(., !is.na(HUC4)) %>% #drop HUC4 is NA
  group_by(HUC4) %>% #group by HUC4 code
  tally() %>%  #tally counts of HUC4
  ggplot(., aes(x=HUC4, y=n)) +
  geom_col(aes(fill = HUC4)) +
  labs(y= "Dam Quantity",
       caption = "No dams recorded in HUC4 0206")

#plot both plots in grid
task1.5 <- gridExtra::grid.arrange(strm.len, dam.count, nrow=1, top = "Correlation between stream length released and number of removed dams")

task1.5
## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name                grob
## 1 1 (2-2,1-1) arrange      gtable[layout]
## 2 2 (2-2,2-2) arrange      gtable[layout]
## 3 3 (1-1,1-2) arrange text[GRID.text.320]

Task 2: Spatial Operations

Task 2.1

###assume lines have no lengths to make spatial
#check crs of streams to see unit length
st_crs(streams) #length units =  meters
## Coordinate Reference System:
##   User input: WGS 84 
##   wkt:
## GEOGCRS["WGS 84",
##     DATUM["World Geodetic System 1984",
##         ELLIPSOID["WGS 84",6378137,298.257223563,
##             LENGTHUNIT["metre",1]]],
##     PRIMEM["Greenwich",0,
##         ANGLEUNIT["degree",0.0174532925199433]],
##     CS[ellipsoidal,2],
##         AXIS["latitude",north,
##             ORDER[1],
##             ANGLEUNIT["degree",0.0174532925199433]],
##         AXIS["longitude",east,
##             ORDER[2],
##             ANGLEUNIT["degree",0.0174532925199433]],
##     ID["EPSG",4326]]
#calculate length of each line in km
streams$len_KM <- as.vector(sf::st_length(streams) / 1000) #1000 m per km

#filter out NA, then sum stream lengths
task2.1 <- streams %>% filter(., !is.na(GNIS_Name)) %>%  #remove NA since we don't know what stream they are from
  group_by(GNIS_Name) %>% #Group by stream name
  summarise(totStreamLen_KM = sum(len_KM)) %>% #sum length per stream
  slice_max(n=5, order_by = totStreamLen_KM) #slice 5 longest streams
  
task2.1
## Simple feature collection with 5 features and 2 fields
## Geometry type: GEOMETRY
## Dimension:     XY
## Bounding box:  xmin: -79.20828 ymin: 37.96212 xmax: -77.96634 ymax: 40.43248
## Geodetic CRS:  WGS 84
## # A tibble: 5 x 3
##   GNIS_Name           totStreamLen_KM                                   geometry
##   <chr>                         <dbl>                             <GEOMETRY [°]>
## 1 Hazel River                    45.4 LINESTRING (-78.24825 38.60204, -78.24823~
## 2 South River                    44.8 LINESTRING (-79.20828 37.96331, -79.20808~
## 3 Great Trough Creek             43.5 MULTILINESTRING ((-78.13179 40.16831, -78~
## 4 Hughes River                   22.5 MULTILINESTRING ((-78.24041 38.53904, -78~
## 5 Little Trough Creek            22.1 LINESTRING (-77.96634 40.43248, -77.9664 ~

Task 2.2

# intersect streams and counties, group by county, sum stream length and slice
task2.2 <- sf::st_intersection(streams, counties) %>% 
  as_tibble() %>%  #convert to tibble for faster processing
  group_by(GEOID10) %>%  # group by county fips code
  summarise(totStrmLen = sum(LengthKM)) %>%  #calculate total stream length per county
  slice_max(n=3, order_by = totStrmLen)  #slice highest three total stream length
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
task2.2
## # A tibble: 3 x 2
##   GEOID10 totStrmLen
##   <chr>        <dbl>
## 1 51015         425.
## 2 42061         309.
## 3 42023         242.

Task 2.3

#create GEOID column in bmps from subset GeographyName
bmps$GEOID = stringr::str_sub(bmps$GeographyName, 1, 5)

#sum cost of bmps per geoid
tot.cost <- bmps %>% filter(., Cost > 0) %>% #filter out values of 0
  group_by(GEOID) %>% #GEOID to get a single value to join to counties later
  summarise(., TotalBMPCosts = sum(Cost))

#join tot.cost to counties on GEOID, drop counties with no BMP costs
task2.3 <- left_join(counties, tot.cost, by = c("GEOID10"="GEOID")) %>% 
  filter(., TotalBMPCosts > 0)

tm_shape(task2.3) + tm_polygons(col = "TotalBMPCosts")

Task 2.4

#use use spatial join with nearest feature predicate
task2.4 <- st_join(dams, streams, st_nearest_feature) %>% 
  select(., c("DAM_NAME.x", "GNIS_Name", "ComID")) #pare dataset down

task2.4
## Simple feature collection with 34 features and 3 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -79.03091 ymin: 37.23352 xmax: -75.53803 ymax: 41.71974
## Geodetic CRS:  WGS 84
## # A tibble: 34 x 4
##    DAM_NAME.x             GNIS_Name                                ComID             geometry
##    <chr>                  <chr>                                    <int>          <POINT [°]>
##  1 BARREN CREEK DAM       Barren Creek                         125993386  (-75.73443 38.4618)
##  2 KLADDER RESERVOIR      <NA>                                  65609396  (-78.3887 40.38991)
##  3 Lake Lehman Dam        <NA>                                  57472299 (-76.86169 39.86292)
##  4 PICRIC                 Driftwood Branch Sinnemahoning Creek  61428128  (-78.2551 41.51775)
##  5 DUCK MARSH POND NO. 26 <NA>                                  61828005 (-78.32606 41.23462)
##  6 LONG RUN               Long Run                              67175720 (-77.35531 41.06447)
##  7 TROUGH CREEK           <NA>                                  65840659 (-78.12468 40.33215)
##  8 OLYPHANT NO 1          Grassy Island Creek                   65629869 (-75.53803 41.46384)
##  9 HEISTAND SAWMILL       Chiques Creek                         57464483 (-76.52639 40.05524)
## 10 SICO                   Little Chiques Creek                  57463235 (-76.49158 40.11082)
## # ... with 24 more rows

Task 2.5

# intersect dams and counties, group by state, and then tally
task2.5 <- st_intersection(dams, counties) %>% 
  as_tibble() %>% #convert to tibble for faster processing
  group_by(STATEFP10) %>% 
  tally(name = "removed.dams")
## Warning: attribute variables are assumed to be spatially constant throughout all
## geometries
task2.5
## # A tibble: 3 x 2
##   STATEFP10 removed.dams
##   <chr>            <int>
## 1 24                   2
## 2 42                  27
## 3 51                   5